if (!file.exists("met_all.gz"))
download.file(
url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
destfile = "met_all.gz",
method = "libcurl",
timeout = 60
)
met <- data.table::fread("met_all.gz")lab04
Question 1- Read in the Data
Question 2 - Prepare the data
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.3 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
met <- met[met$temp > -10][elev == 9999.0, elev := NA]met = mutate(met, date = as.Date(paste(year, month, day, sep = "-")))met$week = week(met$date)met %>%
filter(day==1:7)Warning: There was 1 warning in `filter()`.
ℹ In argument: `day == 1:7`.
Caused by warning in `day == 1:7`:
! longer object length is not a multiple of shorter object length
USAFID WBAN year month day hour min lat lon elev wind.dir
1: 690150 93121 2019 8 1 0 56 34.30 -116.166 696 220
2: 690150 93121 2019 8 1 7 56 34.30 -116.166 696 10
3: 690150 93121 2019 8 1 14 56 34.30 -116.166 696 NA
4: 690150 93121 2019 8 1 21 56 34.30 -116.166 696 200
5: 690150 93121 2019 8 2 5 56 34.30 -116.166 696 260
---
74528: 726813 94195 2019 8 6 14 56 43.65 -116.633 741 20
74529: 726813 94195 2019 8 6 21 56 43.65 -116.633 741 320
74530: 726813 94195 2019 8 7 5 56 43.65 -116.633 741 NA
74531: 726813 94195 2019 8 7 12 56 43.65 -116.633 741 40
74532: 726813 94195 2019 8 7 19 56 43.65 -116.633 741 NA
wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
1: 5 N 5.7 5 22000 5
2: 5 N 2.1 5 22000 5
3: 9 C 0.0 5 22000 5
4: 5 N 7.2 5 22000 5
5: 5 N 4.6 5 22000 5
---
74528: 5 N 3.1 5 22000 5
74529: 5 N 3.1 5 22000 5
74530: 9 C 0.0 5 22000 5
74531: 5 N 1.5 5 22000 5
74532: 9 C 0.0 5 22000 5
ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc temp
1: 9 N 16093 5 N 5 37.2
2: 9 N 16093 5 N 5 28.9
3: 9 N 16093 5 N 5 27.8
4: 9 N 16093 5 N 5 40.0
5: 9 N 16093 5 N 5 31.7
---
74528: 9 N 16093 5 N 5 21.1
74529: 9 N 16093 5 N 5 36.1
74530: 9 N 16093 5 N 5 23.9
74531: 9 N 16093 5 N 5 16.7
74532: 9 N 16093 5 N 5 32.2
temp.qc dew.point dew.point.qc atm.press atm.press.qc rh
1: 5 10.6 5 1009.9 5 19.88127
2: 5 6.7 5 1012.8 5 24.58578
3: 5 5.6 5 1015.0 5 24.31468
4: 5 5.0 5 1011.5 5 11.54872
5: 5 3.3 5 1012.0 5 16.42942
---
74528: 5 16.1 5 1011.5 5 73.24593
74529: 5 9.4 5 1010.0 5 19.50586
74530: 5 12.2 5 1009.1 5 48.01237
74531: 5 13.3 5 1010.5 5 80.48975
74532: 5 11.7 5 1011.4 5 28.44852
date week
1: 2019-08-01 31
2: 2019-08-01 31
3: 2019-08-01 31
4: 2019-08-01 31
5: 2019-08-02 31
---
74528: 2019-08-06 32
74529: 2019-08-06 32
74530: 2019-08-07 32
74531: 2019-08-07 32
74532: 2019-08-07 32
rhnum <-as.numeric(met$rh)
tempnum <-as.numeric(met$temp)
windnum <-as.numeric(met$wind.sp)
vistnum <-as.numeric(met$vist.dist)
dewnum <-as.numeric(met$dew.point)
latnum<- as.numeric(met$lat)
lonnum<-as.numeric(met$lon)
elevnum<-as.numeric(met$elev)mean(rhnum)[1] NA
mean(tempnum)[1] 23.59178
mean(windnum)[1] NA
mean(vistnum)[1] NaN
mean(dewnum)[1] NA
mean(latnum)[1] 37.96869
mean(lonnum)[1] -92.14057
mean(elevnum)[1] NA
met$region <- ifelse(met$lon > -98 & met$lat>39.71, "NE",
ifelse(met$lon > -98 & met$lat <39.71, "NW",
ifelse(lon<-98 & met$lat>39.71, "SE", "SW" )))met_avg <- met[,.(
temp = mean(temp,na.rm=TRUE),
rh = mean(rh,na.rm=TRUE),
wind.sp = mean(wind.sp,na.rm=TRUE),
dew.point = mean(dew.point,na.rm=TRUE),
vis.dist = mean(vis.dist,na.rm=TRUE),
lat = mean(lat),
lon = mean(lon),
elev = mean(elev,na.rm=TRUE)
), by=c("USAFID", "day")]
met_avg[, elev_cat := ifelse(elev > 252, "high", "low")]
met_avg[, region := ifelse(lon > -98 & lat>39.71, "NE",
ifelse(lon > -98 & lat <39.71, "NW",
ifelse(lon<-98 & met$lat>39.71, "SE", "SW" )))]
**used chat gpt in code above to figure out the ifelse statement format
##Question 3
ggplot(data = met_avg) +
geom_violin(mapping = aes(x = dew.point, y = wind.sp)) +
facet_wrap(~ region, nrow = 2)Warning: Removed 397 rows containing non-finite values (`stat_ydensity()`).
It seems as though Northeastern regions have a wider range of dew point by wind speed than any other region. Additionally, SW regions have the smallest range of dew point by wind speed. All regions, however, have dew.point averages around the same wind speeds.
Question 4
library("cowplot")
Attaching package: 'cowplot'
The following object is masked from 'package:lubridate':
stamp
met_avg[!is.na(dew.point & wind.sp)] USAFID day temp rh wind.sp dew.point vis.dist lat
1: 690150 1 32.59583 20.65546 2.895833 6.3000000 15958.92 34.29967
2: 690150 2 33.54167 13.18436 4.154167 1.2166667 16093.00 34.30000
3: 690150 3 34.30833 14.20129 4.433333 2.4958333 16025.96 34.30000
4: 690150 4 34.72083 11.70307 4.245833 0.5541667 16093.00 34.30000
5: 690150 5 34.77391 13.20827 3.660870 1.4956522 15953.09 34.30000
---
48317: 726813 27 20.05833 44.96461 1.579167 5.9375000 15891.88 43.65000
48318: 726813 28 21.18333 46.79024 1.175000 6.8041667 15623.67 43.65000
48319: 726813 29 24.32083 49.07651 1.733333 10.8083333 16025.96 43.65000
48320: 726813 30 24.54583 50.43012 3.508333 12.3083333 16025.96 43.64967
48321: 726813 31 24.27917 51.08570 1.208333 11.6875000 16025.96 43.64933
lon elev elev_cat region
1: -116.1657 690.0833 high SW
2: -116.1660 696.0000 high SW
3: -116.1660 696.0000 high SW
4: -116.1660 696.0000 high SW
5: -116.1660 696.0000 high SW
---
48317: -116.6330 741.0000 high SE
48318: -116.6330 741.0000 high SE
48319: -116.6330 741.0000 high SE
48320: -116.6331 741.0000 high SE
48321: -116.6332 741.0000 high SE
nojitter <- ggplot(data = met[1:1000,]) +
geom_point(mapping = aes(x=dew.point, y=wind.sp)) +
geom_smooth(mapping = aes(x = dew.point, y = wind.sp, linetype = region))
jitter <- ggplot(data = met[1:1000,]) +
geom_point(mapping = aes(x=dew.point, y=wind.sp, fill=region), position = "jitter")
plot_grid(nojitter, jitter, labels = "AUTO") `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Warning: Removed 12 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 12 rows containing missing values (`geom_point()`).
Removed 12 rows containing missing values (`geom_point()`).
Although coloring is set for all regions, only southwestern regions are apparent on the graph. In the graph with jitter, there are more obvious patterns of associations. With the added line, you can easily visualize that as dew point increases, wind speed decreases.
Question 5
met_avg[!is.na(elev_cat)] %>%
ggplot() +
geom_bar(mapping = aes(x = elev_cat, color= region, fill = USAFID), position = "dodge") + scale_fill_brewer(palette = "Set2") +
labs(title = "Accent")Warning: The following aesthetics were dropped during statistical transformation: fill
ℹ This can happen when ggplot fails to infer the correct grouping structure in
the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
variable into a factor?
The graph demonstrates that hight and low elevation levels have different variations of sites by region. High elevations have more sites in the Northeast and Southwest areas, while low elevation levels have more sites in the Northeast and Northwest areas.
Question 6
l <- met_avg[!is.na(wind.sp & dew.point) ] %>%
ggplot() +
stat_summary(mapping = aes(x = dew.point, y = wind.sp, color=region),
fun.data="mean_sdl",
fun.min = min,
fun.max = max,
fun = median)lWarning: Removed 32121 rows containing missing values (`geom_segment()`).
The dew point for all regions seems to land around the same wind speed (between 0 and 10). Northwest regions have slighly larger temperatures than other regions, but this does not seem to be significantly associated.
Question 7
library(leaflet)met_avg[!is.na(rh)] USAFID day temp rh wind.sp dew.point vis.dist lat
1: 690150 1 32.59583 20.65546 2.895833 6.3000000 15958.92 34.29967
2: 690150 2 33.54167 13.18436 4.154167 1.2166667 16093.00 34.30000
3: 690150 3 34.30833 14.20129 4.433333 2.4958333 16025.96 34.30000
4: 690150 4 34.72083 11.70307 4.245833 0.5541667 16093.00 34.30000
5: 690150 5 34.77391 13.20827 3.660870 1.4956522 15953.09 34.30000
---
48694: 726813 27 20.05833 44.96461 1.579167 5.9375000 15891.88 43.65000
48695: 726813 28 21.18333 46.79024 1.175000 6.8041667 15623.67 43.65000
48696: 726813 29 24.32083 49.07651 1.733333 10.8083333 16025.96 43.65000
48697: 726813 30 24.54583 50.43012 3.508333 12.3083333 16025.96 43.64967
48698: 726813 31 24.27917 51.08570 1.208333 11.6875000 16025.96 43.64933
lon elev elev_cat region
1: -116.1657 690.0833 high SW
2: -116.1660 696.0000 high SW
3: -116.1660 696.0000 high SW
4: -116.1660 696.0000 high SW
5: -116.1660 696.0000 high SW
---
48694: -116.6330 741.0000 high SE
48695: -116.6330 741.0000 high SE
48696: -116.6330 741.0000 high SE
48697: -116.6331 741.0000 high SE
48698: -116.6332 741.0000 high SE
met2 <- met_avg[,.(rh = mean(rh,na.rm=TRUE), lat = mean(lat), lon = mean(lon)), by=c("USAFID")]
# Generating a color palette
rh.pal <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=met2$dew.point)
rh.palfunction (x)
{
if (length(x) == 0 || all(is.na(x))) {
return(pf(x))
}
if (is.null(rng))
rng <- range(x, na.rm = TRUE)
rescaled <- scales::rescale(x, from = rng)
if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE))
warning("Some values were outside the color scale and will be treated as NA")
if (reverse) {
rescaled <- 1 - rescaled
}
pf(rescaled)
}
<bytecode: 0x7fd626eff900>
<environment: 0x7fd626f0bbd8>
attr(,"colorType")
[1] "numeric"
attr(,"colorArgs")
attr(,"colorArgs")$na.color
[1] "#808080"
rhmap <- leaflet(met2) %>%
# The looks of the Map
addProviderTiles('CartoDB.Positron') %>%
# Some circles
addCircles(
lat = ~lat, lng=~lon,
# HERE IS OUR PAL!
label = ~paste0(round(rh,2), ' C'), color = ~rh.pal(rh),
opacity = 1, fillOpacity = 1, radius = 500
) %>%
# And a pretty legend
addLegend('bottomleft', pal=rh.pal, values=met2$rh,
title='Relative Humidity, C', opacity=1) %>%
addMarkers(data=met2[rank(-rh) <= 10])Assuming "lon" and "lat" are longitude and latitude, respectively
**used chatgpt to learn how to use the %>% symbol
rhmapThe relative humidity is highest on the western coast and eastern and mid-eastern states. As you approach the west, humidity drops until you reach the western coast.
Question 8
library("ggplot2")library("gganimate")base <-
ggplot (data=met_avg) +
geom_point(mapping=aes(x=dew.point, y=wind.sp))
plot(base)Warning: Removed 397 rows containing missing values (`geom_point()`).
newmap <- base +
transition_states(
region,
transition_length = 2,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')newmapWarning: Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Warning: Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Warning: Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Warning: Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Warning: Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Warning: Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Warning: Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Warning: Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Warning: Removed 89 rows containing missing values (`geom_point()`).
plot(newmap)Warning: Removed 248 rows containing missing values (`geom_point()`).